（转）linux内存管理之伙伴系统（内存分配）

2023-03-29 18:22| 来源: 网络整理| 查看: 265

一、Linux伙伴系统分配器

伙伴系统分配器大体上分为两类。__get_free_pages()类函数返回分配的第一个页面的线性地址；alloc_pages()类函数返回页面描述符地址。不管以哪种函数进行分配，最终会调用alloc_pages()进行分配页面。

为清楚了解其分配制度，先给个伙伴系统数据的存储框图

也就是每个order对应一个free_area结构，free_area以不同的类型以链表的方式存储这些内存块。

二、主分配函数

下面我们来看这个函数(在UMA模式下)

[cpp] view plain copy print ? #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) [cpp] view plain copy print ? static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { /* Unknown node is current node */ if (nid nid = numa_node_id(); return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); } [cpp] view plain copy print ? static inline struct page * __alloc_pages(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist) { return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); }

上层分配函数__alloc_pages_nodemask()

[cpp] view plain copy print ? /* * This is the 'heart' of the zoned buddy allocator. */ /*上层分配器运用了各种方式进行*/ struct page * __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zone *preferred_zone; struct page *page; /* Convert GFP flags to their corresponding migrate type */ int migratetype = allocflags_to_migratetype(gfp_mask); gfp_mask &= gfp_allowed_mask; /*调试用*/ lockdep_trace_alloc(gfp_mask); /*如果__GFP_WAIT标志设置了，需要等待和重新调度*/ might_sleep_if(gfp_mask & __GFP_WAIT); /*没有设置对应的宏*/ if (should_fail_alloc_page(gfp_mask, order)) return NULL; /* * Check the zones suitable for the gfp_mask contain at least one * valid zone. It's possible to have an empty zonelist as a result * of GFP_THISNODE and a memoryless node */ if (unlikely(!zonelist->_zonerefs->zone)) return NULL; /* The preferred zone is used for statistics later */ /* 英文注释所说*/ first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); if (!preferred_zone) return NULL; /* First allocation attempt */ /*从pcp和伙伴系统中正常的分配内存空间*/ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, preferred_zone, migratetype); if (unlikely(!page))/*如果上面没有分配到空间，调用下面函数慢速分配，允许等待和回收*/ page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); /*调试用*/ trace_mm_page_alloc(page, order, gfp_mask, migratetype); return page; }

三、从pcp和伙伴系统中正常的分配内存空间

函数get_page_from_freelist()

[cpp] view plain copy print ? /* * get_page_from_freelist goes through the zonelist trying to allocate * a page. */ /*为分配制定内存空间，遍历每个zone*/ static struct page * get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, struct zonelist *zonelist, int high_zoneidx, int alloc_flags, struct zone *preferred_zone, int migratetype) { struct zoneref *z; struct page *page = NULL; int classzone_idx; struct zone *zone; nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ /*zone对应的下标*/ classzone_idx = zone_idx(preferred_zone); zonelist_scan: /* * Scan zonelist, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ /*遍历每个zone，进行分配*/ for_each_zone_zonelist_nodemask(zone, z, zonelist, /*在UMA模式下不成立*/ high_zoneidx, nodemask) { if (NUMA_BUILD && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) goto try_next_zone; BUILD_BUG_ON(ALLOC_NO_WATERMARKS /*需要关注水位*/ if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; int ret; /*从flags中取的mark*/ mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; /*如果水位正常，从本zone中分配*/ if (zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) goto try_this_zone; if (zone_reclaim_mode == 0)/*如果上面检查的水位低于正常值，且没有设置页面回收值*/ goto this_zone_full; /*在UMA模式下下面函数直接返回0*/ ret = zone_reclaim(zone, gfp_mask, order); switch (ret) { case ZONE_RECLAIM_NOSCAN: /* did not scan */ goto try_next_zone; case ZONE_RECLAIM_FULL: /* scanned but unreclaimable */ goto this_zone_full; default: /* did we reclaim enough */ if (!zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) goto this_zone_full; } } try_this_zone:/*本zone正常水位*/ /*先从pcp中分配，然后不行的话再从伙伴系统中分配*/ page = buffered_rmqueue(preferred_zone, zone, order, gfp_mask, migratetype); if (page) break; this_zone_full: if (NUMA_BUILD)/*UMA模式为0*/ zlc_mark_zone_full(zonelist, z); try_next_zone: if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { /* * we do zlc_setup after the first zone is tried but only * if there are multiple nodes make it worthwhile */ allowednodes = zlc_setup(zonelist, alloc_flags); zlc_active = 1; did_zlc_setup = 1; } } if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { /* Disable zlc cache for second zonelist scan */ zlc_active = 0; goto zonelist_scan; } return page;/*返回页面*/ }

主分配函数

[cpp] view plain copy print ? /* * Really, prep_compound_page() should be called from __rmqueue_bulk(). But * we cheat by calling it from here, in the order > 0 path. Saves a branch * or two. */ /*先考虑从pcp中分配空间，当order大于0时再考虑从伙伴系统中分配*/ static inline struct page *buffered_rmqueue(struct zone *preferred_zone, struct zone *zone, int order, gfp_t gfp_flags, int migratetype) { unsigned long flags; struct page *page; int cold = !!(gfp_flags & __GFP_COLD);/*如果分配参数指定了__GFP_COLD标志，则设置cold标志*/ int cpu; again: cpu = get_cpu(); if (likely(order == 0)) { /*分配一个页面时，使用pcp*/ struct per_cpu_pages *pcp; struct list_head *list; /*找到zone对应的pcp*/ pcp = &zone_pcp(zone, cpu)->pcp; list = &pcp->lists[migratetype];/*pcp中对应类型的list*/ /* 这里需要关中断，因为内存回收过程可能发送核间中断，强制每个核从每CPU 缓存中释放页面。而且中断处理函数也会分配单页。 */ local_irq_save(flags); if (list_empty(list)) { /*如果pcp中没有页面,需要补充*/ /*从伙伴系统中获得batch个页面 batch为一次分配的页面数*/ pcp->count += rmqueue_bulk(zone, 0, pcp->batch, list, migratetype, cold); /*如果链表仍然为空，申请失败返回*/ if (unlikely(list_empty(list))) goto failed; } /* 如果分配的页面不需要考虑硬件缓存(注意不是每CPU页面缓存) ，则取出链表的最后一个节点返回给上层*/ if (cold) page = list_entry(list->prev, struct page, lru); else/* 如果要考虑硬件缓存，则取出链表的第一个页面，这个页面是最近刚释放到每CPU 缓存的，缓存热度更高 */ page = list_entry(list->next, struct page, lru); list_del(&page->lru);/*从pcp中脱离*/ pcp->count--;/*pcp计数减一*/ } else { /*当order为大于1时，不从pcp中分配，直接考虑从伙伴系统中分配*/ if (unlikely(gfp_flags & __GFP_NOFAIL)) { /* * __GFP_NOFAIL is not to be used in new code. * * All __GFP_NOFAIL callers should be fixed so that they * properly detect and handle allocation failures. * * We most definitely don't want callers attempting to * allocate greater than order-1 page units with * __GFP_NOFAIL. */ WARN_ON_ONCE(order > 1); } /* 关中断，并获得管理区的锁*/ spin_lock_irqsave(&zone->lock, flags); /*从伙伴系统中相应类型的相应链表中分配空间*/ page = __rmqueue(zone, order, migratetype); /* 已经分配了1 lru, list); else list_add_tail(&page->lru, list); set_page_private(page, migratetype);/*设置private属性为页面的迁移类型*/ list = &page->lru; } /*递减管理区的空闲页面计数*/ __mod_zone_page_state(zone, NR_FREE_PAGES, -(i lock);/*释放管理区的子璇锁*/ return i; }

3.2 从伙伴系统中取出页面

__rmqueue()函数

[cpp] view plain copy print ? /* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ /*采用两种范式试着分配order个page*/ static struct page *__rmqueue(struct zone *zone, unsigned int order, int migratetype) { struct page *page; retry_reserve: /*从指定order开始从小到达遍历,优先从指定的迁移类型链表中分配页面*/ page = __rmqueue_smallest(zone, order, migratetype); /* * 如果满足以下两个条件,就从备用链表中分配页面: * 快速流程没有分配到页面,需要从备用迁移链表中分配. * 当前不是从保留的链表中分配.因为保留的链表是最后可用的链表, * 不能从该链表分配的话,说明本管理区真的没有可用内存了. */ if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { /*order从大到小遍历，从备用链表中分配页面*/ page = __rmqueue_fallback(zone, order, migratetype); /* * Use MIGRATE_RESERVE rather than fail an allocation. goto * is used because __rmqueue_smallest is an inline function * and we want just one call site */ if (!page) { /* 备用链表中没有分配到页面,从保留链表中分配页面了 */ migratetype = MIGRATE_RESERVE; goto retry_reserve;/* 跳转到retry_reserve,从保留的链表中分配页面*/ } } /*调试代码*/ trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; }

3.2.1 从指定的迁移类型链表中分配页面

从指定order开始从小到达遍历,优先从指定的迁移类型链表中分配页面__rmqueue_smallest(zone, order, migratetype);

[cpp] view plain copy print ? /* * Go through the free lists for the given migratetype and remove * the smallest available page from the freelists */ /*从给定的order开始，从小到大遍历；找到后返回页面基址，合并分割后的空间*/ static inline struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, int migratetype) { unsigned int current_order; struct free_area * area; struct page *page; /* Find a page of the appropriate size in the preferred list */ for (current_order = order; current_order area = &(zone->free_area[current_order]);/*得到指定order的area*/ /*如果area指定类型的伙伴系统链表为空*/ if (list_empty(&area->free_list[migratetype])) continue;/*查找下一个order*/ /*对应的链表不空，得到链表中数据*/ page = list_entry(area->free_list[migratetype].next, struct page, lru); list_del(&page->lru);/*从伙伴系统中删除；*/ rmv_page_order(page);/*移除page中order的变量*/ area->nr_free--;/*空闲块数减一*/ /*拆分、合并*/ expand(zone, page, order, current_order, area, migratetype); return page; } return NULL; }

伙伴系统内存块拆分和合并

看一个辅助函数，用于伙伴系统中内存块的拆分、合并

[cpp] view plain copy print ? /* * The order of subdivision here is critical for the IO subsystem. * Please do not alter this order without good reasons and regression * testing. Specifically, as large blocks of memory are subdivided, * the order in which smaller blocks are delivered depends on the order * they're subdivided in this function. This is the primary factor * influencing the order in which pages are delivered to the IO * subsystem according to empirical testing, and this is also justified * by considering the behavior of a buddy system containing a single * large block of memory acted on by a series of small allocations. * This behavior is a critical factor in sglist merging's success. * * -- wli */ /*此函数主要用于下面这种情况: 分配函数从high中分割出去了low大小的内存；然后要将high留下的内存块合并放到伙伴系统中；*/ static inline void expand(struct zone *zone, struct page *page, int low, int high, struct free_area *area, int migratetype) { unsigned long size = 1 low) { /*因为去掉了low的大小，所以最后肯定剩下的是low的大小(2的指数运算)*/ area--;/*减一到order减一的area*/ high--;/*order减一*/ size >>= 1;/*大小除以2*/ VM_BUG_ON(bad_range(zone, &page[size])); /*加到指定的伙伴系统中*/ list_add(&page[size].lru, &area->free_list[migratetype]); area->nr_free++;/*空闲块加一*/ set_page_order(&page[size], high);/*设置相关order*/ } }

3.2.2 从备用链表中分配页面

[cpp] view plain copy print ? /* Remove an element from the buddy allocator from the fallback list */ static inline struct page * __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) { struct free_area * area; int current_order; struct page *page; int migratetype, i; /* Find the largest possible block of pages in the other list */ /* 从最高阶搜索,这样可以尽量的将其他迁移列表中的大块分割,避免形成过多的碎片 */ for (current_order = MAX_ORDER-1; current_order >= order; --current_order) { for (i = 0; i /*回调到下一个migratetype*/ migratetype = fallbacks[start_migratetype][i]; /* MIGRATE_RESERVE handled later if necessary */ /* 本函数不处理MIGRATE_RESERVE类型的迁移链表,如果本函数返回NULL, 则上层函数直接从MIGRATE_RESERVE中分配 */ if (migratetype == MIGRATE_RESERVE) continue;/*访问下一个类型*/ area = &(zone->free_area[current_order]); /*如果指定order和类型的链表为空*/ if (list_empty(&area->free_list[migratetype])) continue;/*访问下一个类型*/ /*得到指定类型和order的页面基址*/ page = list_entry(area->free_list[migratetype].next, struct page, lru); area->nr_free--;/*空闲块数减一*/ /* * If breaking a large block of pages, move all free * pages to the preferred allocation list. If falling * back for a reclaimable kernel allocation, be more * agressive about taking ownership of free pages */ if (unlikely(current_order >= (pageblock_order >> 1)) ||/* 要分割的页面是一个大页面,则将整个页面全部迁移到当前迁移类型的链表中, 这样可以避免过多的碎片 */ start_migratetype == MIGRATE_RECLAIMABLE ||/* 目前分配的是可回收页面,这类页面有突发的特点,将页面全部迁移到可回收链表中, 可以避免将其他迁移链表分割成太多的碎片 */ page_group_by_mobility_disabled) { /* 指定了迁移策略,总是将被分割的页面迁移 */ unsigned long pages; /*移动到先前类型的伙伴系统中*/ pages = move_freepages_block(zone, page, start_migratetype); /* Claim the whole block if over half of it is free */ /* pages是移动的页面数,如果可移动的页面数量较多, 则将整个大内存块的迁移类型修改 */ if (pages >= (1 lru); rmv_page_order(page); /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order)//大于pageblock_order的部分设置相应标示 /*这个不太可能，因为pageblock_order为10*/ change_pageblock_range(page, current_order, start_migratetype); /*拆分和合并*/ expand(zone, page, order, current_order, area, migratetype); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, migratetype); return page; } } return NULL; }

备用链表

[cpp] view plain copy print ? /* * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ /*指定类型的链表为空时，这个数组规定回调的到那个类型的链表*/ static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ };

移动到指定类型的伙伴系统中

[cpp] view plain copy print ? /*将指定区域段的页面移动到指定类型的伙伴系统中，其实就是将页面的类型做了更改，但是是采用移动的方式功能和上面函数类似，但是要求以页面块方式对其*/ static int move_freepages_block(struct zone *zone, struct page *page, int migratetype) { unsigned long start_pfn, end_pfn; struct page *start_page, *end_page; /*如下是对齐操作，其中变量pageblock_nr_pages为MAX_ORDER-1*/ start_pfn = page_to_pfn(page); start_pfn = start_pfn & ~(pageblock_nr_pages-1); start_page = pfn_to_page(start_pfn); end_page = start_page + pageblock_nr_pages - 1; end_pfn = start_pfn + pageblock_nr_pages - 1; /* Do not cross zone boundaries */ if (start_pfn zone_start_pfn) start_page = page; /*结束边界检查*/ if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages) return 0; /*调用上面函数*/ return move_freepages(zone, start_page, end_page, migratetype); } [cpp] view plain copy print ? /* * Move the free pages in a range to the free lists of the requested type. * Note that start_page and end_pages are not aligned on a pageblock * boundary. If alignment is required, use move_freepages_block() */ /*将指定区域段的页面移动到指定类型的伙伴系统中，其实就是将页面的类型做了更改，但是是采用移动的方式*/ static int move_freepages(struct zone *zone, struct page *start_page, struct page *end_page, int migratetype) { struct page *page; unsigned long order; int pages_moved = 0; #ifndef CONFIG_HOLES_IN_ZONE /* * page_zone is not safe to call in this context when * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant * anyway as we check zone boundaries in move_freepages_block(). * Remove at a later date when no bug reports exist related to * grouping pages by mobility */ BUG_ON(page_zone(start_page) != page_zone(end_page)); #endif for (page = start_page; page lru);/*将页面块从原来的伙伴系统链表*/ /*中删除，注意，这里不是一个页面 *而是以该页面的伙伴块*/ list_add(&page->lru,/*添加到指定order和类型下的伙伴系统链表*/ &zone->free_area[order].free_list[migratetype]); page += 1 = MAX_ORDER) { WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); return NULL; } /* * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and * __GFP_NOWARN set) should not cause reclaim since the subsystem * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim * using a larger set of nodes after it has established that the * allowed per node queues are empty and that nodes are * over allocated. */ /** * 调用者指定了GFP_THISNODE标志，表示不能进行内存回收。 * 上层调用者应当在指定了GFP_THISNODE失败后，使用其他标志进行分配。 */ if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) goto nopage; restart:/*如果调用者没有禁止kswapd，则唤醒该线程进行内存回收。*/ wake_all_kswapd(order, zonelist, high_zoneidx); /* * OK, we're below the kswapd watermark and have kicked background * reclaim. Now things get more complex, so set up alloc_flags according * to how we want to proceed. */ /*根据分配标志确定内部标志，主要是用于水线 */ alloc_flags = gfp_to_alloc_flags(gfp_mask); /** * 与快速分配流程相比，这里的分配标志使用了低的水线。 * 在进行内存回收操作前，我们使用低水线再尝试分配一下。 * 当然，不管是否允许ALLOC_NO_WATERMARKS标志，我们都将它清除。 */ /* This is the last chance, in general, before the goto nopage. */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, preferred_zone, migratetype); if (page)/*分配成功，找到页面*/ goto got_pg; rebalance: /* Allocate without watermarks if the context allows */ /* 某些上下文，如内存回收进程及被杀死的任务，都允许它完全突破水线的限制分配内存。 */ if (alloc_flags & ALLOC_NO_WATERMARKS) { page = __alloc_pages_high_priority(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); if (page))/* 在不考虑水线的情况下，分配到了内存 */ goto got_pg; } /* Atomic allocations - we can't balance anything */ /* 调用者希望原子分配内存，此时不能等待内存回收，返回NULL */ if (!wait) goto nopage; /* Avoid recursion of direct reclaim */ /* 调用者本身就是内存回收进程，不能进入后面的内存回收处理流程，否则死锁 */ if (p->flags & PF_MEMALLOC) goto nopage; /* Avoid allocations with no watermarks from looping endlessly */ /** * 当前线程正在被杀死，它可以完全突破水线分配内存。这里向上层返回NULL，是为了避免系统进入死循环。 * 当然，如果上层调用不允许失败，则死循环继续分配，等待其他线程释放一点点内存。 */ if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) goto nopage; /* Try direct reclaim and then allocating */ /** * 直接在内存分配上下文中进行内存回收操作。 */ page = __alloc_pages_direct_reclaim(gfp_mask, order, zonelist, high_zoneidx, nodemask, alloc_flags, preferred_zone, migratetype, &did_some_progress); if (page))/* 庆幸，回收了一些内存后，满足了上层分配需求 */ goto got_pg; /* * If we failed to make any progress reclaiming, then we are * running out of options and have to consider going OOM */ /* 内存回收过程没有回收到内存，系统真的内存不足了 */ if (!did_some_progress) { /** * 调用者不是文件系统的代码，允许进行文件系统操作，并且允许重试。 * 这里需要__GFP_FS标志可能是进入OOM流程后会杀进程或进入panic，需要文件操作。 */ if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { if (oom_killer_disabled)/* 系统禁止了OOM，向上层返回NULL */ goto nopage; /** * 杀死其他进程后再尝试分配内存 */ page = __alloc_pages_may_oom(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, migratetype); if (page) goto got_pg; /* * The OOM killer does not trigger for high-order * ~__GFP_NOFAIL allocations so if no progress is being * made, there are no other options and retrying is * unlikely to help. */)/* 要求的页面数量较多，再试意义不大 */ if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL)) goto nopage; goto restart; } } /* Check if we should retry the allocation */ /* 内存回收过程回收了一些内存，接下来判断是否有必要继续重试 */ pages_reclaimed += did_some_progress; if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { /* Wait for some write requests to complete then retry */ congestion_wait(BLK_RW_ASYNC, HZ/50); goto rebalance; } nopage: /* 内存分配失败了，打印内存分配失败的警告 */ if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { printk(KERN_WARNING "%s: page allocation failure." " order:%d, mode:0x%x\n", p->comm, order, gfp_mask); dump_stack(); show_mem(); } return page; got_pg: /* 运行到这里，说明成功分配了内存，这里进行内存检测调试 */ if (kmemcheck_enabled) kmemcheck_pagealloc_alloc(page, order, gfp_mask); return page; }

总结：Linux伙伴系统主要分配流程为

正常非配(或叫快速分配)流程：

1，如果分配的是单个页面，考虑从per CPU缓存中分配空间，如果缓存中没有页面，从伙伴系统中提取页面做补充。

2，分配多个页面时，从指定类型中分配，如果指定类型中没有足够的页面，从备用类型链表中分配。最后会试探保留类型链表。

慢速(允许等待和页面回收)分配：

3，当上面两种分配方案都不能满足要求时，考虑页面回收、杀死进程等操作后在试。

【本文地址】

公司简介

联系我们